import pandas as pd
import numpy as np
import pandas as pd
# Read the Excel file
df = pd.read_excel('Datasets/ElectricityByCounty.xlsx')
# Reshape the DataFrame
df = df.melt(id_vars=['County', 'Sector'],
var_name='Timestamp', value_name='Yearly Data')
# Convert the "Timestamp" column to datetime format representing the year
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%Y')
# Sort the DataFrame by County and Timestamp
df = df.sort_values(['County', 'Timestamp'])
# Print the resulting DataFrame
df_indexd = df.set_index('Timestamp')
df_indexd
| County | Sector | Yearly Data | |
|---|---|---|---|
| Timestamp | |||
| 1990-01-01 | ALAMEDA | Non-Residential | 7109.299895 |
| 1990-01-01 | ALAMEDA | Residential | 2498.265626 |
| 1990-01-01 | ALAMEDA | Total | 9607.565521 |
| 1991-01-01 | ALAMEDA | Non-Residential | 6809.573768 |
| 1991-01-01 | ALAMEDA | Residential | 2515.209970 |
| ... | ... | ... | ... |
| 2020-01-01 | YUBA | Residential | 251.861806 |
| 2020-01-01 | YUBA | Total | 567.007959 |
| 2021-01-01 | YUBA | Non-Residential | 315.822249 |
| 2021-01-01 | YUBA | Residential | 260.500675 |
| 2021-01-01 | YUBA | Total | 576.322924 |
5568 rows × 3 columns
import pandas as pd
import plotly.graph_objects as go
from statsmodels.tsa.arima.model import ARIMA
from dateutil.relativedelta import relativedelta
import numpy as np
from pmdarima import auto_arima
# Read the Excel file
df = pd.read_excel('Datasets/ElectricityByCounty.xlsx')
# Reshape the DataFrame
df = df.melt(id_vars=['County', 'Sector'],
var_name='Timestamp', value_name='Yearly Data')
# Convert the "Timestamp" column to datetime format representing the year
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%Y')
# Sort the DataFrame by County and Timestamp
df = df.sort_values(['County', 'Timestamp'])
# Iterate over each country and sector
for country in df['County'].unique():
for sector in df['Sector'].unique():
fig = go.Figure()
# Get the energy consumption data for the current country and sector
df_filter = df[(df['County'] == country) & (
df['Sector'] == sector)][['Timestamp', 'Yearly Data']]
df_filter_index = df_filter.set_index('Timestamp')
# Prepare the data for modeling
years = df_filter_index.index
energy_consumption = df_filter_index.values.flatten()
# Split the data into training and testing
# Use all data except the last 5 years for training
train_data = energy_consumption[:-5]
test_data = energy_consumption[-5:] # Use the last 5 years for testing
# Fit the auto ARIMA model
model = auto_arima(train_data, seasonal=False)
model.fit(train_data)
# Generate predictions
predictions = model.predict(n_periods=len(test_data))
# Plot the training data
fig.add_trace(go.Scatter(
x=years[:-5], y=train_data, mode='lines+markers', name='Training Data'))
# Plot the predictions
fig.add_trace(go.Scatter(
x=years[-5:], y=test_data, mode='lines+markers', name='Testing Data'))
fig.add_trace(go.Scatter(
x=years[-5:], y=predictions, mode='lines+markers', name='Predictions'))
# Update the layout
fig.update_layout(title=f'Energy Consumption Forecast Country : {country} : Sector {sector} ',
xaxis_title='Year', yaxis_title='Energy Consumption')
# Show the plot
fig.show()